package cn.doitedu.day04

import cn.doitedu.day01.utils.SparkUtil
import org.apache.spark.rdd.RDD

/**
 * @Date 22.4.1
 * @Created by HANGGE
 * @Description
 */
object C10_转换算子_Sample {
  def main(args: Array[String]): Unit = {
    val sc = SparkUtil.getSc
    val rdd = sc.parallelize(List(1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6))

    val res: RDD[Int] = rdd.sample(true, 0.3, 2)
 // 统计出现次数最多的数据
    res.groupBy(e=>e).map(tp=>(tp._1 , tp._2.toList.size)).sortBy(-_._2).take(2).foreach(println)

  }

}
